from urllib.request import urlopen
from bs4 import BeautifulSoup
import os, pymysql, time

targetDir = '../../../resources/phphtml/'
realTargetDirPath = os.path.realpath(targetDir)

filelist = os.listdir(realTargetDirPath)

# 数据库连接对象
dbConn = pymysql.connect('112.74.124.220', 'dengqihua', 'dengqihua520', 'phpdoc', charset='utf8')

i = 1
for file in filelist:
    nowTime = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
    filePath = realTargetDirPath + '/' + file
    print(str(i) + (' ' * 5) + file)
    fileObj = open(filePath, 'r', encoding='UTF-8')
    html = fileObj.read()
    fileObj.close()

    # 当前链接名称
    currName = file.rpartition('.')[0]

    # 创建数据库游标
    curserObj = dbConn.cursor()

    # selectSql = 'SELECT title FROM p_documents WHERE file_name = %s'
    # curserObj.execute(selectSql, currName)
    # currTitle = curserObj.fetchone()[0]

    bs4Obj = BeautifulSoup(html, 'html.parser')

    try:
        keyword = bs4Obj.find('h1').get_text()
    except AttributeError:
        try:
            keyword = bs4Obj.find('h2').get_text()
        except AttributeError:
            keyword = ''

    # 更新
    updateSql = "UPDATE p_documents SET keyword = %s, update_time = %s WHERE file_name = %s"
    print(' ' * 10 + updateSql)

    curserObj.execute(updateSql, (keyword, nowTime, currName))
    dbConn.commit()

    curserObj.close()

    i = i + 1

dbConn.close()
